diff --git a/v2_1/buildrelease.pl b/v2_1/buildrelease.pl new file mode 100755 index 0000000..ed3a17b --- /dev/null +++ b/v2_1/buildrelease.pl @@ -0,0 +1,18 @@ +#! /usr/bin/perl + +$release_files = 'source/utf8.h source/utf8/core.h source/utf8/checked.h source/utf8/unchecked.h doc/utf8cpp.html doc/ReleaseNotes'; + +# First get the latest version +`svn update`; + +# Then construct the name of the zip file +$argc = @ARGV; +if ($argc > 0) { + $zip_name = $ARGV[0]; +} +else { + $zip_name = "utf8"; +} + +# Zip the files to an archive +`zip $zip_name $release_files`; diff --git a/v2_1/doc/ReleaseNotes b/v2_1/doc/ReleaseNotes new file mode 100644 index 0000000..c061666 --- /dev/null +++ b/v2_1/doc/ReleaseNotes @@ -0,0 +1,18 @@ +utf8 cpp library +Release 2.0 + +This is the first production release of the version 2.0 of the library. Please note that the branch 1.x is still being developed, and will be maintained in the future. + +Changes from version 2.o Beta 1: +- A minor fix in the utf::iterator +- Updated documentation + + +Changes from version 1.02: + +- Feature request [1569543]: Add utf8 iterator adapter class +- Feature request [1569479]: Add replace_invalid function +- Reorganized the source code: rather than a single header file, the library now resides in three separate ones. To maintain the backward compatibility, there is also a convenience header utf8.h that includes all the other ones. +- The requironment for octet_iterator is bidirectional, rather than random access, although in practice it probably doesn't mean a lot. + +Files included in the release: utf8.h, core.h, checked.h, unchecked.h, utf8cpp.html, ReleaseNotes diff --git a/v2_1/doc/utf8cpp.html b/v2_1/doc/utf8cpp.html new file mode 100644 index 0000000..63e9afd --- /dev/null +++ b/v2_1/doc/utf8cpp.html @@ -0,0 +1,1498 @@ + + + + + + + + + UTF8-CPP: UTF-8 with C++ in a Portable Way + + + + +

+ UTF8-CPP: UTF-8 with C++ in a Portable Way +

+

+ The Sourceforge project page +

+
+

+ Table of Contents +

+ +
+

+ Introduction +

+

+ Many C++ developers miss an easy and portable way of handling Unicode encoded + strings. C++ Standard is currently Unicode agnostic, and while some work is being + done to introduce Unicode to the next incarnation called C++0x, for the moment + nothing of the sort is available. In the meantime, developers use 3rd party + libraries like ICU, OS specific capabilities, or simply roll out their own + solutions. +

+

+ In order to easily handle UTF-8 encoded Unicode strings, I have come up with a small + generic library. For anybody used to work with STL algorithms and iterators, it should be + easy and natural to use. The code is freely available for any purpose - check out + the license at the beginning of the utf8.h file. If you run into + bugs or performance issues, please let me know and I'll do my best to address them. +

+

+ The purpose of this article is not to offer an introduction to Unicode in general, + and UTF-8 in particular. If you are not familiar with Unicode, be sure to check out + Unicode Home Page or some other source of + information for Unicode. Also, it is not my aim to advocate the use of UTF-8 + encoded strings in C++ programs; if you want to handle UTF-8 encoded strings from + C++, I am sure you have good reasons for it. +

+

+ Examples of use +

+

+ To illustrate the use of this utf8 library, we shall open a file containing UTF-8 + encoded text, check whether it starts with a byte order mark, read each line into a + std::string, check it for validity, convert the text to UTF-16, and + back to UTF-8: +

+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include "utf8.h"
+using namespace std;
+int main()
+{
+    if (argc != 2) {
+        cout << "\nUsage: docsample filename\n";
+        return 0;
+    }
+    const char* test_file_path = argv[1];
+    // Open the test file (must be UTF-8 encoded)
+    ifstream fs8(test_file_path);
+    if (!fs8.is_open()) {
+    cout << "Could not open " << test_file_path << endl;
+    return 0;
+    }
+    // Read the first line of the file
+    unsigned line_count = 1;
+    string line;
+    if (!getline(fs8, line)) 
+        return 0;
+    // Look for utf-8 byte-order mark at the beginning
+    if (line.size() > 2) {
+        if (utf8::is_bom(line.c_str()))
+            cout << "There is a byte order mark at the beginning of the file\n";
+    }
+    // Play with all the lines in the file
+    do {
+       // check for invalid utf-8 (for a simple yes/no check, there is also utf8::is_valid function)
+        string::iterator end_it = utf8::find_invalid(line.begin(), line.end());
+        if (end_it != line.end()) {
+            cout << "Invalid UTF-8 encoding detected at line " << line_count << "\n";
+            cout << "This part is fine: " << string(line.begin(), end_it) << "\n";
+        }
+        // Get the line length (at least for the valid part)
+        int length = utf8::distance(line.begin(), end_it);
+        cout << "Length of line " << line_count << " is " << length <<  "\n";
+        // Convert it to utf-16
+        vector<unsigned short> utf16line;
+        utf8::utf8to16(line.begin(), end_it, back_inserter(utf16line));
+        // And back to utf-8
+        string utf8line; 
+        utf8::utf16to8(utf16line.begin(), utf16line.end(), back_inserter(utf8line));
+        // Confirm that the conversion went OK:
+        if (utf8line != string(line.begin(), end_it))
+            cout << "Error in UTF-16 conversion at line: " << line_count << "\n";        
+        getline(fs8, line);
+        line_count++;
+    } while (!fs8.eof());
+    return 0;
+}
+
+

+ In the previous code sample, we have seen the use of the following functions from + utf8 namespace: first we used is_bom function to detect + UTF-8 byte order mark at the beginning of the file; then for each line we performed + a detection of invalid UTF-8 sequences with find_invalid; the number + of characters (more precisely - the number of Unicode code points) in each line was + determined with a use of utf8::distance; finally, we have converted + each line to UTF-16 encoding with utf8to16 and back to UTF-8 with + utf16to8. +

+

+ Reference +

+

+ Functions From utf8 Namespace +

+

+ utf8::append +

+

+ Available in version 1.0 and later. +

+

+ Encodes a 32 bit code point as a UTF-8 sequence of octets and appends the sequence + to a UTF-8 string. +

+
+template <typename octet_iterator>
+octet_iterator append(uint32_t cp, octet_iterator result);
+   
+
+

+ cp: A 32 bit integer representing a code point to append to the + sequence.
+ result: An output iterator to the place in the sequence where to + append the code point.
+ Return value: An iterator pointing to the place + after the newly appended sequence. +

+

+ Example of use: +

+
+unsigned char u[5] = {0,0,0,0,0};
+unsigned char* end = append(0x0448, u);
+assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0);
+
+

+ Note that append does not allocate any memory - it is the burden of + the caller to make sure there is enough memory allocated for the operation. To make + things more interesting, append can add anywhere between 1 and 4 + octets to the sequence. In practice, you would most often want to use + std::back_inserter to ensure that the necessary memory is allocated. +

+

+ In case of an invalid code point, a utf8::invalid_code_point exception + is thrown. +

+

+ utf8::next +

+

+ Available in version 1.0 and later. +

+

+ Given the iterator to the beginning of the UTF-8 sequence, it returns the code + point and moves the iterator to the next position. +

+
+template <typename octet_iterator> 
+uint32_t next(octet_iterator& it, octet_iterator end);
+   
+
+

+ it: a reference to an iterator pointing to the beginning of an UTF-8 + encoded code point. After the function returns, it is incremented to point to the + beginning of the next code point.
+ end: end of the UTF-8 sequence to be processed. If it + gets equal to end during the extraction of a code point, an + utf8::not_enough_room exception is thrown.
+ Return value: the 32 bit representation of the + processed UTF-8 code point. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+char* w = twochars;
+int cp = next(w, twochars + 6);
+assert (cp == 0x65e5);
+assert (w == twochars + 3);
+
+

+ This function is typically used to iterate through a UTF-8 encoded string. +

+

+ In case of an invalid UTF-8 seqence, a utf8::invalid_utf8 exception is + thrown. +

+

+ utf8::prior +

+

+ Available in version 1.02 and later. +

+

+ Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it + decreases the iterator until it hits the beginning of the previous UTF-8 encoded + code point and returns the 32 bits representation of the code point. +

+
+template <typename octet_iterator> 
+uint32_t prior(octet_iterator& it, octet_iterator start);
+   
+
+

+ it: a reference pointing to an octet within a UTF-8 encoded string. + After the function returns, it is decremented to point to the beginning of the + previous code point.
+ start: an iterator to the beginning of the sequence where the search + for the beginning of a code point is performed. It is a + safety measure to prevent passing the beginning of the string in the search for a + UTF-8 lead octet.
+ Return value: the 32 bit representation of the + previous code point. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+unsigned char* w = twochars + 3;
+int cp = prior (w, twochars);
+assert (cp == 0x65e5);
+assert (w == twochars);
+
+

+ This function has two purposes: one is two iterate backwards through a UTF-8 + encoded string. Note that it is usually a better idea to iterate forward instead, + since utf8::next is faster. The second purpose is to find a beginning + of a UTF-8 sequence if we have a random position within a string. +

+

+ it will typically point to the beginning of + a code point, and start will point to the + beginning of the string to ensure we don't go backwards too far. it is + decreased until it points to a lead UTF-8 octet, and then the UTF-8 sequence + beginning with that octet is decoded to a 32 bit representation and returned. +

+

+ In case pass_end is reached before a UTF-8 lead octet is hit, or if an + invalid UTF-8 sequence is started by the lead octet, an invalid_utf8 + exception is thrown. +

+

+ utf8::previous +

+

+ Deprecated in version 1.02 and later. +

+

+ Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it + decreases the iterator until it hits the beginning of the previous UTF-8 encoded + code point and returns the 32 bits representation of the code point. +

+
+template <typename octet_iterator> 
+uint32_t previous(octet_iterator& it, octet_iterator pass_start);
+   
+
+

+ it: a reference pointing to an octet within a UTF-8 encoded string. + After the function returns, it is decremented to point to the beginning of the + previous code point.
+ pass_start: an iterator to the point in the sequence where the search + for the beginning of a code point is aborted if no result was reached. It is a + safety measure to prevent passing the beginning of the string in the search for a + UTF-8 lead octet.
+ Return value: the 32 bit representation of the + previous code point. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+unsigned char* w = twochars + 3;
+int cp = previous (w, twochars - 1);
+assert (cp == 0x65e5);
+assert (w == twochars);
+
+

+ utf8::previous is deprecated, and utf8::prior should + be used instead, although the existing code can continue using this function. + The problem is the parameter pass_start that points to the position + just before the beginning of the sequence. Standard containers don't have the + concept of "pass start" and the function can not be used with their iterators. +

+

+ it will typically point to the beginning of + a code point, and pass_start will point to the octet just before the + beginning of the string to ensure we don't go backwards too far. it is + decreased until it points to a lead UTF-8 octet, and then the UTF-8 sequence + beginning with that octet is decoded to a 32 bit representation and returned. +

+

+ In case pass_end is reached before a UTF-8 lead octet is hit, or if an + invalid UTF-8 sequence is started by the lead octet, an invalid_utf8 + exception is thrown +

+

+ utf8::advance +

+

+ Available in version 1.0 and later. +

+

+ Advances an iterator by the specified number of code points within an UTF-8 + sequence. +

+
+template <typename octet_iterator, typename distance_type> 
+void advance (octet_iterator& it, distance_type n, octet_iterator end);
+   
+
+

+ it: a reference to an iterator pointing to the beginning of an UTF-8 + encoded code point. After the function returns, it is incremented to point to the + nth following code point.
+ n: a positive integer that shows how many code points we want to + advance.
+ end: end of the UTF-8 sequence to be processed. If it + gets equal to end during the extraction of a code point, an + utf8::not_enough_room exception is thrown.
+

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+unsigned char* w = twochars;
+advance (w, 2, twochars + 6);
+assert (w == twochars + 5);
+
+

+ This function works only "forward". In case of a negative n, there is + no effect. +

+

+ In case of an invalid code point, a utf8::invalid_code_point exception + is thrown. +

+

+ utf8::distance +

+

+ Available in version 1.0 and later. +

+

+ Given the iterators to two UTF-8 encoded code points in a seqence, returns the + number of code points between them. +

+
+template <typename octet_iterator> 
+typename std::iterator_traits<octet_iterator>::difference_type distance (octet_iterator first, octet_iterator last);
+   
+
+

+ first: an iterator to a beginning of a UTF-8 encoded code point.
+ last: an iterator to a "post-end" of the last UTF-8 encoded code + point in the sequence we are trying to determine the length. It can be the + beginning of a new code point, or not.
+ Return value the distance between the iterators, + in code points. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+size_t dist = utf8::distance(twochars, twochars + 5);
+assert (dist == 2);
+
+

+ This function is used to find the length (in code points) of a UTF-8 encoded + string. The reason it is called distance, rather than, say, + length is mainly because developers are used that length is an + O(1) function. Computing the length of an UTF-8 string is a linear operation, and + it looked better to model it after std::distance algorithm. +

+

+ In case of an invalid UTF-8 seqence, a utf8::invalid_utf8 exception is + thrown. If last does not point to the past-of-end of a UTF-8 seqence, + a utf8::not_enough_room exception is thrown. +

+

+ utf8::utf16to8 +

+

+ Available in version 1.0 and later. +

+

+ Converts a UTF-16 encoded string to UTF-8. +

+
+template <typename u16bit_iterator, typename octet_iterator>
+octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-16 encoded + string to convert.
+ end: an iterator pointing to pass-the-end of the UTF-16 encoded + string to convert.
+ result: an output iterator to the place in the UTF-8 string where to + append the result of conversion.
+ Return value: An iterator pointing to the place + after the appended UTF-8 string. +

+

+ Example of use: +

+
+unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
+vector<unsigned char> utf8result;
+utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
+assert (utf8result.size() == 10);    
+
+

+ In case of invalid UTF-16 sequence, a utf8::invalid_utf16 exception is + thrown. +

+

+ utf8::utf8to16 +

+

+ Available in version 1.0 and later. +

+

+ Converts an UTF-8 encoded string to UTF-16 +

+
+template <typename u16bit_iterator, typename octet_iterator>
+u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-8 encoded + string to convert. < br /> end: an iterator pointing to + pass-the-end of the UTF-8 encoded string to convert.
+ result: an output iterator to the place in the UTF-16 string where to + append the result of conversion.
+ Return value: An iterator pointing to the place + after the appended UTF-16 string. +

+

+ Example of use: +

+
+char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
+vector <unsigned short> utf16result;
+utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result));
+assert (utf16result.size() == 4);
+assert (utf16result[2] == 0xd834);
+assert (utf16result[3] == 0xdd1e);
+
+

+ In case of an invalid UTF-8 seqence, a utf8::invalid_utf8 exception is + thrown. If end does not point to the past-of-end of a UTF-8 seqence, a + utf8::not_enough_room exception is thrown. +

+

+ utf8::utf32to8 +

+

+ Available in version 1.0 and later. +

+

+ Converts a UTF-32 encoded string to UTF-8. +

+
+template <typename octet_iterator, typename u32bit_iterator>
+octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-32 encoded + string to convert.
+ end: an iterator pointing to pass-the-end of the UTF-32 encoded + string to convert.
+ result: an output iterator to the place in the UTF-8 string where to + append the result of conversion.
+ Return value: An iterator pointing to the place + after the appended UTF-8 string. +

+

+ Example of use: +

+
+int utf32string[] = {0x448, 0x65E5, 0x10346, 0};
+vector<unsigned char> utf8result;
+utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
+assert (utf8result.size() == 9);
+
+

+ In case of invalid UTF-32 string, a utf8::invalid_code_point exception + is thrown. +

+

+ utf8::utf8to32 +

+

+ Available in version 1.0 and later. +

+

+ Converts a UTF-8 encoded string to UTF-32. +

+
+template <typename octet_iterator, typename u32bit_iterator>
+u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-8 encoded + string to convert.
+ end: an iterator pointing to pass-the-end of the UTF-8 encoded string + to convert.
+ result: an output iterator to the place in the UTF-32 string where to + append the result of conversion.
+ Return value: An iterator pointing to the place + after the appended UTF-32 string. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+vector<int> utf32result;
+utf8to32(twochars, twochars + 5, back_inserter(utf32result));
+assert (utf32result.size() == 2);
+
+

+ In case of an invalid UTF-8 seqence, a utf8::invalid_utf8 exception is + thrown. If end does not point to the past-of-end of a UTF-8 seqence, a + utf8::not_enough_room exception is thrown. +

+

+ utf8::find_invalid +

+

+ Available in version 1.0 and later. +

+

+ Detects an invalid sequence within a UTF-8 string. +

+
+template <typename octet_iterator> 
+octet_iterator find_invalid(octet_iterator start, octet_iterator end);
+
+

+ start: an iterator pointing to the beginning of the UTF-8 string to + test for validity.
+ end: an iterator pointing to pass-the-end of the UTF-8 string to test + for validity.
+ Return value: an iterator pointing to the first + invalid octet in the UTF-8 string. In case none were found, equals + end. +

+

+ Example of use: +

+
+char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa";
+char* invalid = find_invalid(utf_invalid, utf_invalid + 6);
+assert (invalid == utf_invalid + 5);
+
+

+ This function is typically used to make sure a UTF-8 string is valid before + processing it with other functions. It is especially important to call it if before + doing any of the unchecked operations on it. +

+

+ utf8::is_valid +

+

+ Available in version 1.0 and later. +

+

+ Checks whether a sequence of octets is a valid UTF-8 string. +

+
+template <typename octet_iterator> 
+bool is_valid(octet_iterator start, octet_iterator end);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-8 string to + test for validity.
+ end: an iterator pointing to pass-the-end of the UTF-8 string to test + for validity.
+ Return value: true if the sequence + is a valid UTF-8 string; false if not. +

+ Example of use: +
+char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa";
+bool bvalid = is_valid(utf_invalid, utf_invalid + 6);
+assert (bvalid == false);
+
+

+ is_valid is a shorthand for find_invalid(start, end) == + end;. You may want to use it to make sure that a byte seqence is a valid + UTF-8 string without the need to know where it fails if it is not valid. +

+

+ utf8::replace_invalid +

+

+ Available in version 2.0 and later. +

+

+ Replaces all invalid UTF-8 sequences within a string with a replacement marker. +

+
+template <typename octet_iterator, typename output_iterator>
+output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement);
+template <typename octet_iterator, typename output_iterator>
+output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-8 string to + look for invalid UTF-8 sequences.
+ end: an iterator pointing to pass-the-end of the UTF-8 string to look + for invalid UTF-8 sequences.
+ out: An output iterator to the range where the result of replacement + is stored.
+ replacement: A Unicode code point for the replacement marker. The + version without this parameter assumes the value 0xfffd
+ Return value: An iterator pointing to the place + after the UTF-8 string with replaced invalid sequences. +

+

+ Example of use: +

+
+char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z";
+vector<char> replace_invalid_result;
+replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), back_inserter(replace_invalid_result), '?');
+bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end());
+assert (bvalid);
+char* fixed_invalid_sequence = "a????z";
+assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(), fixed_invalid_sequence));
+
+

+ replace_invalid does not perform in-place replacement of invalid + sequences. Rather, it produces a copy of the original string with the invalid + sequences replaced with a replacement marker. Therefore, out must not + be in the [start, end] range. +

+

+ If end does not point to the past-of-end of a UTF-8 sequence, a + utf8::not_enough_room exception is thrown. +

+

+ utf8::is_bom +

+

+ Available in version 1.0 and later. +

+

+ Checks whether a sequence of three octets is a UTF-8 byte order mark (BOM) +

+
+template <typename octet_iterator> 
+bool is_bom (octet_iterator it);
+
+

+ it: beginning of the 3-octet sequence to check
+ Return value: true if the sequence + is UTF-8 byte order mark; false if not. +

+

+ Example of use: +

+
+unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf};
+bool bbom = is_bom(byte_order_mark);
+assert (bbom == true);
+
+

+ The typical use of this function is to check the first three bytes of a file. If + they form the UTF-8 BOM, we want to skip them before processing the actual UTF-8 + encoded text. +

+

+ Types From utf8 Namespace +

+

+ utf8::iterator +

+

+ Available in version 2.0 and later. +

+

+ Adapts the underlying octet iterator to iterate over the sequence of code points, + rather than raw octets. +

+
+template <typename octet_iterator>
+class iterator;
+
+ +
Member functions
+
+
iterator();
the deafult constructor; the underlying octet_iterator is + constructed with its default constructor. +
explicit iterator (const octet_iterator& octet_it, + const octet_iterator& range_start, + const octet_iterator& range_end);
a constructor + that initializes the underlying octet_iterator with octet_it + and sets the range in which the iterator is considered valid. +
octet_iterator base () const;
returns the + underlying octet_iterator. +
uint32_t operator * () const;
decodes the utf-8 sequence + the underlying octet_iterator is pointing to and returns the code point. +
bool operator == (const iterator& rhs) + const;
returns true + if the two underlaying iterators are equal. +
bool operator != (const iterator& rhs) + const;
returns true + if the two underlaying iterators are not equal. +
iterator& operator ++ ();
the prefix increment - moves + the iterator to the next UTF-8 encoded code point. +
iterator operator ++ (int);
+ the postfix increment - moves the iterator to the next UTF-8 encoded code point and returns the current one. +
iterator& operator -- ();
the prefix decrement - moves + the iterator to the previous UTF-8 encoded code point. +
iterator operator -- (int);
+ the postfix decrement - moves the iterator to the previous UTF-8 encoded code point and returns the current one. +
+

+ Example of use: +

+
+char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
+utf8::iterator<char*> it(threechars, threechars, threechars + 9);
+utf8::iterator<char*> it2 = it;
+assert (it2 == it);
+assert (*it == 0x10346);
+assert (*(++it) == 0x65e5);
+assert ((*it++) == 0x65e5);
+assert (*it == 0x0448);
+assert (it != it2);
+utf8::iterator<char*> endit (threechars + 9, threechars, threechars + 9);  
+assert (++it == endit);
+assert (*(--it) == 0x0448);
+assert ((*it--) == 0x0448);
+assert (*it == 0x65e5);
+assert (--it == utf8::iterator<char*>(threechars, threechars, threechars + 9));
+assert (*it == 0x10346);
+
+

+ The purpose of utf8::iterator adapter is to enable easy iteration as well as the use of STL + algorithms with UTF-8 encoded strings. Increment and decrement operators are implemented in terms of + utf8::next() and utf8::prior() functions. +

+

+ Note that utf8::iterator adapter is a checked iterator. It operates on the range specified in + the constructor; any attempt to go out of that range will result in an exception. Even the comparison operators + require both iterator object to be constructed against the same range - otherwise an exception is thrown. Typically, + the range will be determined by sequence container functions begin and end, i.e.: +

+
+std::string s = "example";
+utf8::iterator i (s.begin(), s.begin(), s.end());
+
+

+ Functions From utf8::unchecked Namespace +

+

+ utf8::unchecked::append +

+

+ Available in version 1.0 and later. +

+

+ Encodes a 32 bit code point as a UTF-8 sequence of octets and appends the sequence + to a UTF-8 string. +

+
+template <typename octet_iterator>
+octet_iterator append(uint32_t cp, octet_iterator result);
+   
+
+

+ cp: A 32 bit integer representing a code point to append to the + sequence.
+ result: An output iterator to the place in the sequence where to + append the code point.
+ Return value: An iterator pointing to the place + after the newly appended sequence. +

+

+ Example of use: +

+
+unsigned char u[5] = {0,0,0,0,0};
+unsigned char* end = unchecked::append(0x0448, u);
+assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0);
+
+

+ This is a faster but less safe version of utf8::append. It does not + check for validity of the supplied code point, and may produce an invalid UTF-8 + sequence. +

+

+ utf8::unchecked::next +

+

+ Available in version 1.0 and later. +

+

+ Given the iterator to the beginning of a UTF-8 sequence, it returns the code point + and moves the iterator to the next position. +

+
+template <typename octet_iterator>
+uint32_t next(octet_iterator& it);
+   
+
+

+ it: a reference to an iterator pointing to the beginning of an UTF-8 + encoded code point. After the function returns, it is incremented to point to the + beginning of the next code point.
+ Return value: the 32 bit representation of the + processed UTF-8 code point. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+char* w = twochars;
+int cp = unchecked::next(w);
+assert (cp == 0x65e5);
+assert (w == twochars + 3);
+
+

+ This is a faster but less safe version of utf8::next. It does not + check for validity of the supplied UTF-8 sequence. +

+

+ utf8::unchecked::prior +

+

+ Available in version 1.02 and later. +

+

+ Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it + decreases the iterator until it hits the beginning of the previous UTF-8 encoded + code point and returns the 32 bits representation of the code point. +

+
+template <typename octet_iterator>
+uint32_t prior(octet_iterator& it);
+   
+
+

+ it: a reference pointing to an octet within a UTF-8 encoded string. + After the function returns, it is decremented to point to the beginning of the + previous code point.
+ Return value: the 32 bit representation of the + previous code point. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+char* w = twochars + 3;
+int cp = unchecked::prior (w);
+assert (cp == 0x65e5);
+assert (w == twochars);
+
+

+ This is a faster but less safe version of utf8::prior. It does not + check for validity of the supplied UTF-8 sequence and offers no boundary checking. +

+

+ utf8::unchecked::previous (deprecated, see utf8::unchecked::prior) +

+

+ Deprecated in version 1.02 and later. +

+

+ Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it + decreases the iterator until it hits the beginning of the previous UTF-8 encoded + code point and returns the 32 bits representation of the code point. +

+
+template <typename octet_iterator>
+uint32_t previous(octet_iterator& it);
+   
+
+

+ it: a reference pointing to an octet within a UTF-8 encoded string. + After the function returns, it is decremented to point to the beginning of the + previous code point.
+ Return value: the 32 bit representation of the + previous code point. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+char* w = twochars + 3;
+int cp = unchecked::previous (w);
+assert (cp == 0x65e5);
+assert (w == twochars);
+
+

+ The reason this function is deprecated is just the consistency with the "checked" + versions, where prior should be used instead of previous. + In fact, unchecked::previous behaves exactly the same as + unchecked::prior +

+

+ This is a faster but less safe version of utf8::previous. It does not + check for validity of the supplied UTF-8 sequence and offers no boundary checking. +

+

+ utf8::unchecked::advance +

+

+ Available in version 1.0 and later. +

+

+ Advances an iterator by the specified number of code points within an UTF-8 + sequence. +

+
+template <typename octet_iterator, typename distance_type>
+void advance (octet_iterator& it, distance_type n);
+   
+
+

+ it: a reference to an iterator pointing to the beginning of an UTF-8 + encoded code point. After the function returns, it is incremented to point to the + nth following code point.
+ n: a positive integer that shows how many code points we want to + advance.
+

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+char* w = twochars;
+unchecked::advance (w, 2);
+assert (w == twochars + 5);
+
+

+ This function works only "forward". In case of a negative n, there is + no effect. +

+

+ This is a faster but less safe version of utf8::advance. It does not + check for validity of the supplied UTF-8 sequence and offers no boundary checking. +

+

+ utf8::unchecked::distance +

+

+ Available in version 1.0 and later. +

+

+ Given the iterators to two UTF-8 encoded code points in a seqence, returns the + number of code points between them. +

+
+template <typename octet_iterator>
+typename std::iterator_traits<octet_iterator>::difference_type distance (octet_iterator first, octet_iterator last);
+
+

+ first: an iterator to a beginning of a UTF-8 encoded code point.
+ last: an iterator to a "post-end" of the last UTF-8 encoded code + point in the sequence we are trying to determine the length. It can be the + beginning of a new code point, or not.
+ Return value the distance between the iterators, + in code points. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+size_t dist = utf8::unchecked::distance(twochars, twochars + 5);
+assert (dist == 2);
+
+

+ This is a faster but less safe version of utf8::distance. It does not + check for validity of the supplied UTF-8 sequence. +

+

+ utf8::unchecked::utf16to8 +

+

+ Available in version 1.0 and later. +

+

+ Converts a UTF-16 encoded string to UTF-8. +

+
+template <typename u16bit_iterator, typename octet_iterator>
+octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-16 encoded + string to convert.
+ end: an iterator pointing to pass-the-end of the UTF-16 encoded + string to convert.
+ result: an output iterator to the place in the UTF-8 string where to + append the result of conversion.
+ Return value: An iterator pointing to the place + after the appended UTF-8 string. +

+

+ Example of use: +

+
+unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
+vector<unsigned char> utf8result;
+unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
+assert (utf8result.size() == 10);    
+
+

+ This is a faster but less safe version of utf8::utf16to8. It does not + check for validity of the supplied UTF-16 sequence. +

+

+ utf8::unchecked::utf8to16 +

+

+ Available in version 1.0 and later. +

+

+ Converts an UTF-8 encoded string to UTF-16 +

+
+template <typename u16bit_iterator, typename octet_iterator>
+u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-8 encoded + string to convert. < br /> end: an iterator pointing to + pass-the-end of the UTF-8 encoded string to convert.
+ result: an output iterator to the place in the UTF-16 string where to + append the result of conversion.
+ Return value: An iterator pointing to the place + after the appended UTF-16 string. +

+

+ Example of use: +

+
+char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
+vector <unsigned short> utf16result;
+unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result));
+assert (utf16result.size() == 4);
+assert (utf16result[2] == 0xd834);
+assert (utf16result[3] == 0xdd1e);
+
+

+ This is a faster but less safe version of utf8::utf8to16. It does not + check for validity of the supplied UTF-8 sequence. +

+

+ utf8::unchecked::utf32to8 +

+

+ Available in version 1.0 and later. +

+

+ Converts a UTF-32 encoded string to UTF-8. +

+
+template <typename octet_iterator, typename u32bit_iterator>
+octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-32 encoded + string to convert.
+ end: an iterator pointing to pass-the-end of the UTF-32 encoded + string to convert.
+ result: an output iterator to the place in the UTF-8 string where to + append the result of conversion.
+ Return value: An iterator pointing to the place + after the appended UTF-8 string. +

+

+ Example of use: +

+
+int utf32string[] = {0x448, 0x65e5, 0x10346, 0};
+vector<unsigned char> utf8result;
+utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
+assert (utf8result.size() == 9);
+
+

+ This is a faster but less safe version of utf8::utf32to8. It does not + check for validity of the supplied UTF-32 sequence. +

+

+ utf8::unchecked::utf8to32 +

+

+ Available in version 1.0 and later. +

+

+ Converts a UTF-8 encoded string to UTF-32. +

+
+template <typename octet_iterator, typename u32bit_iterator>
+u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result);
+   
+
+

+ start: an iterator pointing to the beginning of the UTF-8 encoded + string to convert.
+ end: an iterator pointing to pass-the-end of the UTF-8 encoded string + to convert.
+ result: an output iterator to the place in the UTF-32 string where to + append the result of conversion.
+ Return value: An iterator pointing to the place + after the appended UTF-32 string. +

+

+ Example of use: +

+
+char* twochars = "\xe6\x97\xa5\xd1\x88";
+vector<int> utf32result;
+unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result));
+assert (utf32result.size() == 2);
+
+

+ This is a faster but less safe version of utf8::utf8to32. It does not + check for validity of the supplied UTF-8 sequence. +

+

+ Types From utf8::unchecked Namespace +

+

+ utf8::iterator +

+

+ Available in version 2.0 and later. +

+

+ Adapts the underlying octet iterator to iterate over the sequence of code points, + rather than raw octets. +

+
+template <typename octet_iterator>
+class iterator;
+
+ +
Member functions
+
+
iterator();
the deafult constructor; the underlying octet_iterator is + constructed with its default constructor. +
explicit iterator (const octet_iterator& octet_it); +
a constructor + that initializes the underlying octet_iterator with octet_it +
octet_iterator base () const;
returns the + underlying octet_iterator. +
uint32_t operator * () const;
decodes the utf-8 sequence + the underlying octet_iterator is pointing to and returns the code point. +
bool operator == (const iterator& rhs) + const;
returns true + if the two underlaying iterators are equal. +
bool operator != (const iterator& rhs) + const;
returns true + if the two underlaying iterators are not equal. +
iterator& operator ++ ();
the prefix increment - moves + the iterator to the next UTF-8 encoded code point. +
iterator operator ++ (int);
+ the postfix increment - moves the iterator to the next UTF-8 encoded code point and returns the current one. +
iterator& operator -- ();
the prefix decrement - moves + the iterator to the previous UTF-8 encoded code point. +
iterator operator -- (int);
+ the postfix decrement - moves the iterator to the previous UTF-8 encoded code point and returns the current one. +
+

+ Example of use: +

+
+char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
+utf8::unchecked::iterator<char*> un_it(threechars);
+utf8::unchecked::iterator<char*> un_it2 = un_it;
+assert (un_it2 == un_it);
+assert (*un_it == 0x10346);
+assert (*(++un_it) == 0x65e5);
+assert ((*un_it++) == 0x65e5);
+assert (*un_it == 0x0448);
+assert (un_it != un_it2);
+utf8::::unchecked::iterator<char*> un_endit (threechars + 9);  
+assert (++un_it == un_endit);
+assert (*(--un_it) == 0x0448);
+assert ((*un_it--) == 0x0448);
+assert (*un_it == 0x65e5);
+assert (--un_it == utf8::unchecked::iterator<char*>(threechars));
+assert (*un_it == 0x10346);
+
+

+ This is an unchecked version of utf8::iterator. It is faster in many cases, but offers + no validity or range checks. +

+

+ Points of interest +

+

+ Design goals and decisions +

+

+ The library was designed to be: +

+
    +
  1. + Generic: for better or worse, there are many C++ string classes out there, and + the library should work with as many of them as possible. +
  2. +
  3. + Portable: the library should be portable both accross different platforms and + compilers. The only non-portable code is a small section that declares unsigned + integers of different sizes: three typedefs. They can be changed by the users of + the library if they don't match their platform. The default setting should work + for Windows (both 32 and 64 bit), and most 32 bit and 64 bit Unix derivatives. +
  4. +
  5. + Lightweight: follow the "pay only for what you use" guidline. +
  6. +
  7. + Unintrusive: avoid forcing any particular design or even programming style on the + user. This is a library, not a framework. +
  8. +
+

+ Alternatives +

+

+ In case you want to look into other means of working with UTF-8 strings from C++, + here is the list of solutions I am aware of: +

+
    +
  1. + ICU Library. It is very powerful, + complete, feature-rich, mature, and widely used. Also big, intrusive, + non-generic, and doesn't play well with the Standard Library. I definitelly + recommend looking at ICU even if you don't plan to use it. +
  2. +
  3. + Glib::ustring. + A class specifically made to work with UTF-8 strings, and also feel like + std::string. If you prefer to have yet another string class in your + code, it may be worth a look. Be aware of the licensing issues, though. +
  4. +
  5. + Platform dependent solutions: Windows and POSIX have functions to convert strings + from one encoding to another. That is only a subset of what my library offers, + but if that is all you need it may be good enough, especially given the fact that + these functions are mature and tested in production. +
  6. +
+

+ Conclusion +

+

+ Until Unicode becomes officially recognized by the C++ Standard Library, we need to + use other means to work with UTF-8 strings. Template functions I describe in this + article may be a good step in this direction. +

+ +
    +
  1. + The Unicode Consortium. +
  2. +
  3. + ICU Library. +
  4. +
  5. + UTF-8 at Wikipedia +
  6. +
  7. + UTF-8 and Unicode FAQ for + Unix/Linux +
  8. +
+ + diff --git a/v2_1/samples/Makefile b/v2_1/samples/Makefile new file mode 100644 index 0000000..6cdd3c8 --- /dev/null +++ b/v2_1/samples/Makefile @@ -0,0 +1,5 @@ +CC = g++ +CFLAGS = -g -Wall -pedantic + +docsample: docsample.cpp ../source/utf8.h + $(CC) $(CFLAGS) docsample.cpp -odocsample diff --git a/v2_1/samples/docsample.cpp b/v2_1/samples/docsample.cpp new file mode 100644 index 0000000..fc8f6f3 --- /dev/null +++ b/v2_1/samples/docsample.cpp @@ -0,0 +1,63 @@ +#include "../source/utf8.h" +#include +#include +#include +#include + + +using namespace std; + +int main(int argc, char** argv) +{ + if (argc != 2) { + cout << "\nUsage: docsample filename\n"; + return 0; + } + const char* test_file_path = argv[1]; + // Open the test file (must be UTF-8 encoded) + ifstream fs8(test_file_path); + if (!fs8.is_open()) { + cout << "Could not open " << test_file_path << endl; + return 0; + } + + // Read the first line of the file + unsigned line_count = 1; + string line; + if (!getline(fs8, line)) + return 0; + + // Look for utf-8 byte-order mark at the beginning + if (line.size() > 2) { + if (utf8::is_bom(line.c_str())) + cout << "There is a byte order mark at the beginning of the file\n"; + } + + // Play with all the lines in the file + do { + // check for invalid utf-8 (for a simple yes/no check, there is also utf8::is_valid function) + string::iterator end_it = utf8::find_invalid(line.begin(), line.end()); + if (end_it != line.end()) { + cout << "Invalid UTF-8 encoding detected at line " << line_count << "\n"; + cout << "This part is fine: " << string(line.begin(), end_it) << "\n"; + } + // Get the line length (at least for the valid part) + int length = utf8::distance(line.begin(), end_it); + cout << "Length of line " << line_count << " is " << length << "\n"; + + // Convert it to utf-16 + vector utf16line; + utf8::utf8to16(line.begin(), end_it, back_inserter(utf16line)); + // And back to utf-8; + string utf8line; + utf8::utf16to8(utf16line.begin(), utf16line.end(), back_inserter(utf8line)); + // Confirm that the conversion went OK: + if (utf8line != string(line.begin(), end_it)) + cout << "Error in UTF-16 conversion at line: " << line_count << "\n"; + + getline(fs8, line); + line_count++; + } while (!fs8.eof()); + + return 0; +} diff --git a/v2_1/source/utf8.h b/v2_1/source/utf8.h new file mode 100644 index 0000000..4e44514 --- /dev/null +++ b/v2_1/source/utf8.h @@ -0,0 +1,34 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "utf8/checked.h" +#include "utf8/unchecked.h" + +#endif // header guard diff --git a/v2_1/source/utf8/checked.h b/v2_1/source/utf8/checked.h new file mode 100644 index 0000000..dc342ff --- /dev/null +++ b/v2_1/source/utf8/checked.h @@ -0,0 +1,312 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "core.h" +#include + +namespace utf8 +{ + // Exceptions that may be thrown from the library functions. + class invalid_code_point : public std::exception { + uint32_t cp; + public: + invalid_code_point(uint32_t cp) : cp(cp) {} + virtual const char* what() const throw() { return "Invalid code point"; } + uint32_t code_point() const {return cp;} + }; + + class invalid_utf8 : public std::exception { + uint8_t u8; + public: + invalid_utf8 (uint8_t u) : u8(u) {} + virtual const char* what() const throw() { return "Invalid UTF-8"; } + uint8_t utf8_octet() const {return u8;} + }; + + class invalid_utf16 : public std::exception { + uint16_t u16; + public: + invalid_utf16 (uint16_t u) : u16(u) {} + virtual const char* what() const throw() { return "Invalid UTF-16"; } + uint16_t utf16_word() const {return u16;} + }; + + class not_enough_room : public std::exception { + public: + virtual const char* what() const throw() { return "Not enough space"; } + }; + + /// The library API - functions intended to be called by the users + + template + output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) + { + while (start != end) { + octet_iterator sequence_start = start; + internal::utf_error err_code = internal::validate_next(start, end); + switch (err_code) { + case internal::OK : + for (octet_iterator it = sequence_start; it != start; ++it) + *out++ = *it; + break; + case internal::NOT_ENOUGH_ROOM: + throw not_enough_room(); + case internal::INVALID_LEAD: + append (replacement, out); + ++start; + break; + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: + case internal::INVALID_CODE_POINT: + append (replacement, out); + ++start; + // just one replacement mark for the sequence + while (internal::is_trail(*start) && start != end) + ++start; + break; + } + } + return out; + } + + template + inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) + { + static const uint32_t replacement_marker = internal::mask16(0xfffd); + return replace_invalid(start, end, out, replacement_marker); + } + + template + octet_iterator append(uint32_t cp, octet_iterator result) + { + if (!internal::is_code_point_valid(cp)) + throw invalid_code_point(cp); + + if (cp < 0x80) // one octet + *(result++) = static_cast(cp); + else if (cp < 0x800) { // two octets + *(result++) = static_cast((cp >> 6) | 0xc0); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp < 0x10000) { // three octets + *(result++) = static_cast((cp >> 12) | 0xe0); + *(result++) = static_cast((cp >> 6) & 0x3f | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp <= internal::CODE_POINT_MAX) { // four octets + *(result++) = static_cast((cp >> 18) | 0xf0); + *(result++) = static_cast((cp >> 12)& 0x3f | 0x80); + *(result++) = static_cast((cp >> 6) & 0x3f | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else + throw invalid_code_point(cp); + + return result; + } + + template + uint32_t next(octet_iterator& it, octet_iterator end) + { + uint32_t cp = 0; + internal::utf_error err_code = internal::validate_next(it, end, &cp); + switch (err_code) { + case internal::OK : + break; + case internal::NOT_ENOUGH_ROOM : + throw not_enough_room(); + case internal::INVALID_LEAD : + case internal::INCOMPLETE_SEQUENCE : + case internal::OVERLONG_SEQUENCE : + throw invalid_utf8(*it); + case internal::INVALID_CODE_POINT : + throw invalid_code_point(cp); + } + return cp; + } + + template + uint32_t prior(octet_iterator& it, octet_iterator start) + { + octet_iterator end = it; + while (internal::is_trail(*(--it))) + if (it < start) + throw invalid_utf8(*it); // error - no lead byte in the sequence + octet_iterator temp = it; + return next(temp, end); + } + + /// Deprecated in versions that include "prior" + template + uint32_t previous(octet_iterator& it, octet_iterator pass_start) + { + octet_iterator end = it; + while (internal::is_trail(*(--it))) + if (it == pass_start) + throw invalid_utf8(*it); // error - no lead byte in the sequence + octet_iterator temp = it; + return next(temp, end); + } + + template + void advance (octet_iterator& it, distance_type n, octet_iterator end) + { + for (distance_type i = 0; i < n; ++i) + next(it, end); + } + + template + typename std::iterator_traits::difference_type + distance (octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + next(first, last); + return dist; + } + + template + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) { + uint32_t cp = internal::mask16(*start++); + // Take care of surrogate pairs first + if (internal::is_surrogate(cp)) { + if (start != end) { + uint32_t trail_surrogate = internal::mask16(*start++); + if (trail_surrogate >= internal::TRAIL_SURROGATE_MIN && trail_surrogate <= internal::TRAIL_SURROGATE_MAX) + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + else + throw invalid_utf16(static_cast(trail_surrogate)); + } + else + throw invalid_utf16(static_cast(*start)); + + } + result = append(cp, result); + } + return result; + } + + template + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start != end) { + uint32_t cp = next(start, end); + if (cp > 0xffff) { //make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } + + template + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = append(*(start++), result); + + return result; + } + + template + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start < end) + (*result++) = next(start, end); + + return result; + } + + // The iterator class + template + class iterator : public std::iterator { + octet_iterator it; + octet_iterator range_start; + octet_iterator range_end; + public: + iterator () {}; + explicit iterator (const octet_iterator& octet_it, + const octet_iterator& range_start, + const octet_iterator& range_end) : + it(octet_it), range_start(range_start), range_end(range_end) + { + if (it < range_start || it > range_end) + throw std::out_of_range("Invalid utf-8 iterator position"); + } + // the default "big three" are OK + octet_iterator base () const { return it; } + uint32_t operator * () const + { + octet_iterator temp = it; + return next(temp, range_end); + } + bool operator == (const iterator& rhs) const + { + if (range_start != rhs.range_start || range_end != rhs.range_end) + throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); + return (it == rhs.it); + } + bool operator != (const iterator& rhs) const + { + return !(operator == (rhs)); + } + iterator& operator ++ () + { + next(it, range_end); + return *this; + } + iterator operator ++ (int) + { + iterator temp = *this; + next(it, range_end); + return temp; + } + iterator& operator -- () + { + prior(it, range_start); + return *this; + } + iterator operator -- (int) + { + iterator temp = *this; + prior(it, range_start); + return temp; + } + }; // class iterator + +} // namespace utf8 + +#endif //header guard + + diff --git a/v2_1/source/utf8/core.h b/v2_1/source/utf8/core.h new file mode 100644 index 0000000..ca8c94a --- /dev/null +++ b/v2_1/source/utf8/core.h @@ -0,0 +1,259 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include + +namespace utf8 +{ + // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers + // You may need to change them to match your system. + // These typedefs have the same names as ones from cstdint, or boost/cstdint + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + +// Helper code - not intended to be directly called by the library users. May be changed at any time +namespace internal +{ + // Unicode constants + // Leading (high) surrogates: 0xd800 - 0xdbff + // Trailing (low) surrogates: 0xdc00 - 0xdfff + const uint16_t LEAD_SURROGATE_MIN = 0xd800u; + const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; + const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; + const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; + const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); + const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; + + // Maximum valid value for a Unicode code point + const uint32_t CODE_POINT_MAX = 0x0010ffffu; + + template + inline uint8_t mask8(octet_type oc) + { + return static_cast(0xff & oc); + } + template + inline uint16_t mask16(u16_type oc) + { + return static_cast(0xffff & oc); + } + template + inline bool is_trail(octet_type oc) + { + return ((mask8(oc) >> 6) == 0x2); + } + + template + inline bool is_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + + template + inline bool is_code_point_valid(u32 cp) + { + return (cp <= CODE_POINT_MAX && !is_surrogate(cp) && cp != 0xfffe && cp != 0xffff); + } + + template + inline typename std::iterator_traits::difference_type + sequence_length(octet_iterator lead_it) + { + uint8_t lead = mask8(*lead_it); + if (lead < 0x80) + return 1; + else if ((lead >> 5) == 0x6) + return 2; + else if ((lead >> 4) == 0xe) + return 3; + else if ((lead >> 3) == 0x1e) + return 4; + else + return 0; + } + + enum utf_error {OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; + + template + utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point) + { + uint32_t cp = mask8(*it); + // Check the lead octet + typedef typename std::iterator_traits::difference_type octet_difference_type; + octet_difference_type length = sequence_length(it); + + // "Shortcut" for ASCII characters + if (length == 1) { + if (end - it > 0) { + if (code_point) + *code_point = cp; + ++it; + return OK; + } + else + return NOT_ENOUGH_ROOM; + } + + // Do we have enough memory? + if (std::distance(it, end) < length) + return NOT_ENOUGH_ROOM; + + // Check trail octets and calculate the code point + switch (length) { + case 0: + return INVALID_LEAD; + break; + case 2: + if (is_trail(*(++it))) { + cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); + } + else { + --it; + return INCOMPLETE_SEQUENCE; + } + break; + case 3: + if (is_trail(*(++it))) { + cp = ((cp << 12) & 0xffff) + ((mask8(*it) << 6) & 0xfff); + if (is_trail(*(++it))) { + cp += (*it) & 0x3f; + } + else { + std::advance(it, -2); + return INCOMPLETE_SEQUENCE; + } + } + else { + --it; + return INCOMPLETE_SEQUENCE; + } + break; + case 4: + if (is_trail(*(++it))) { + cp = ((cp << 18) & 0x1fffff) + ((mask8(*it) << 12) & 0x3ffff); + if (is_trail(*(++it))) { + cp += (mask8(*it) << 6) & 0xfff; + if (is_trail(*(++it))) { + cp += (*it) & 0x3f; + } + else { + std::advance(it, -3); + return INCOMPLETE_SEQUENCE; + } + } + else { + std::advance(it, -2); + return INCOMPLETE_SEQUENCE; + } + } + else { + --it; + return INCOMPLETE_SEQUENCE; + } + break; + } + // Is the code point valid? + if (!is_code_point_valid(cp)) { + for (octet_difference_type i = 0; i < length - 1; ++i) + --it; + return INVALID_CODE_POINT; + } + + if (code_point) + *code_point = cp; + + if (cp < 0x80) { + if (length != 1) { + std::advance(it, -(length-1)); + return OVERLONG_SEQUENCE; + } + } + else if (cp < 0x800) { + if (length != 2) { + std::advance(it, -(length-1)); + return OVERLONG_SEQUENCE; + } + } + else if (cp < 0x10000) { + if (length != 3) { + std::advance(it, -(length-1)); + return OVERLONG_SEQUENCE; + } + } + + ++it; + return OK; + } + + template + inline utf_error validate_next(octet_iterator& it, octet_iterator end) { + return validate_next(it, end, 0); + } + +} // namespace internal + + /// The library API - functions intended to be called by the users + + // Byte order mark + const uint8_t bom[] = {0xef, 0xbb, 0xbf}; + + template + octet_iterator find_invalid(octet_iterator start, octet_iterator end) + { + octet_iterator result = start; + while (result != end) { + internal::utf_error err_code = internal::validate_next(result, end); + if (err_code != internal::OK) + return result; + } + return result; + } + + template + inline bool is_valid(octet_iterator start, octet_iterator end) + { + return (find_invalid(start, end) == end); + } + + template + inline bool is_bom (octet_iterator it) + { + return ( + (internal::mask8(*it++)) == bom[0] && + (internal::mask8(*it++)) == bom[1] && + (internal::mask8(*it)) == bom[2] + ); + } +} // namespace utf8 + +#endif // header guard + + diff --git a/v2_1/source/utf8/unchecked.h b/v2_1/source/utf8/unchecked.h new file mode 100644 index 0000000..2277d28 --- /dev/null +++ b/v2_1/source/utf8/unchecked.h @@ -0,0 +1,221 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "core.h" + +namespace utf8 +{ + namespace unchecked + { + template + octet_iterator append(uint32_t cp, octet_iterator result) + { + if (cp < 0x80) // one octet + *(result++) = static_cast(cp); + else if (cp < 0x800) { // two octets + *(result++) = static_cast((cp >> 6) | 0xc0); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp < 0x10000) { // three octets + *(result++) = static_cast((cp >> 12) | 0xe0); + *(result++) = static_cast((cp >> 6) & 0x3f | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else { // four octets + *(result++) = static_cast((cp >> 18) | 0xf0); + *(result++) = static_cast((cp >> 12)& 0x3f | 0x80); + *(result++) = static_cast((cp >> 6) & 0x3f | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + return result; + } + template + uint32_t next(octet_iterator& it) + { + uint32_t cp = internal::mask8(*it); + typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); + switch (length) { + case 1: + break; + case 2: + it++; + cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); + break; + case 3: + ++it; + cp = ((cp << 12) & 0xffff) + ((internal::mask8(*it) << 6) & 0xfff); + ++it; + cp += (*it) & 0x3f; + break; + case 4: + ++it; + cp = ((cp << 18) & 0x1fffff) + ((internal::mask8(*it) << 12) & 0x3ffff); + ++it; + cp += (internal::mask8(*it) << 6) & 0xfff; + ++it; + cp += (*it) & 0x3f; + break; + } + ++it; + return cp; + } + + template + uint32_t prior(octet_iterator& it) + { + while (internal::is_trail(*(--it))) ; + octet_iterator temp = it; + return next(temp); + } + + // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) + template + inline uint32_t previous(octet_iterator& it) + { + return prior(it); + } + + template + void advance (octet_iterator& it, distance_type n) + { + for (distance_type i = 0; i < n; ++i) + next(it); + } + + template + typename std::iterator_traits::difference_type + distance (octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + next(first); + return dist; + } + + template + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) { + uint32_t cp = internal::mask16(*start++); + // Take care of surrogate pairs first + if (internal::is_surrogate(cp)) { + uint32_t trail_surrogate = internal::mask16(*start++); + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + } + result = append(cp, result); + } + return result; + } + + template + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start != end) { + uint32_t cp = next(start); + if (cp > 0xffff) { //make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } + + template + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = append(*(start++), result); + + return result; + } + + template + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start < end) + (*result++) = next(start); + + return result; + } + + // The iterator class + template + class iterator : public std::iterator { + octet_iterator it; + public: + iterator () {}; + explicit iterator (const octet_iterator& octet_it): it(octet_it) {} + // the default "big three" are OK + octet_iterator base () const { return it; } + uint32_t operator * () const + { + octet_iterator temp = it; + return next(temp); + } + bool operator == (const iterator& rhs) const + { + return (it == rhs.it); + } + bool operator != (const iterator& rhs) const + { + return !(operator == (rhs)); + } + iterator& operator ++ () + { + std::advance(it, internal::sequence_length(it)); + return *this; + } + iterator operator ++ (int) + { + iterator temp = *this; + std::advance(it, internal::sequence_length(it)); + return temp; + } + iterator& operator -- () + { + prior(it); + return *this; + } + iterator operator -- (int) + { + iterator temp = *this; + prior(it); + return temp; + } + }; // class iterator + + } // namespace utf8::unchecked +} // namespace utf8 + + +#endif // header guard + diff --git a/v2_1/test_data/negative/utf8_invalid.txt b/v2_1/test_data/negative/utf8_invalid.txt new file mode 100755 index 0000000..abd16f7 Binary files /dev/null and b/v2_1/test_data/negative/utf8_invalid.txt differ diff --git a/v2_1/test_data/utf8samples/UTF-8-demo.txt b/v2_1/test_data/utf8samples/UTF-8-demo.txt new file mode 100644 index 0000000..4363f27 --- /dev/null +++ b/v2_1/test_data/utf8samples/UTF-8-demo.txt @@ -0,0 +1,212 @@ + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ diff --git a/v2_1/test_data/utf8samples/Unicode_transcriptions.html b/v2_1/test_data/utf8samples/Unicode_transcriptions.html new file mode 100644 index 0000000..69b29ff --- /dev/null +++ b/v2_1/test_data/utf8samples/Unicode_transcriptions.html @@ -0,0 +1,167 @@ +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + diff --git a/v2_1/test_data/utf8samples/big.txt b/v2_1/test_data/utf8samples/big.txt new file mode 100644 index 0000000..1038482 --- /dev/null +++ b/v2_1/test_data/utf8samples/big.txt @@ -0,0 +1,420160 @@ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. +? *Unicode Transcriptions* Notes <#Notes> + +Glyphs | Samples + | Charts + | UTF + | Forms + | +Home . + + +Name Text Image +Arabic (Arabic) يونِكود ? +Arabic (Persian) یونی‌کُد / ?/ +Armenian Յունիկօդ +Bengali য়ূনিকোড +Bopomofo ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ +ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ +Braille +Buhid +Canadian Aboriginal ᔫᗂᑰᑦ +Cherokee ᏳᏂᎪᏛ +Cypriot +Cyrillic (Russian) Юникод ? +Deseret (English) ??????? +Devanagari (Hindi) यूनिकोड ? +Ethiopic ዩኒኮድ +Georgian უნიკოდი ? +Gothic +Greek Γιούνικοντ +Gujarati યૂનિકોડ +Gurmukhi ਯੂਨਿਕੋਡ +Han (Chinese) 统一码 ? +統一碼 ? +万国码 ? +萬國碼 ? +Hangul 유니코드 +Hanunoo +Hebrew יוניקוד +Hebrew (pointed) יוּנִיקוׁד +Hebrew (Yiddish) יוניקאָד ? +Hiragana (Japanese) ゆにこおど +Katakana (Japanese) ユニコード ? +Kannada ಯೂನಿಕೋಡ್ +Khmer យូនីគោដ +Lao +Latin Unicode Unicode +Latin (IPA <#English_Pronunciation>) ˈjunɪˌkoːd ? +Latin (Am. Dict. <#American_Dictionary>) Ūnĭcōde̽ ? +Limbu +Linear B +Malayalam യൂനികോഡ് +Mongolian +Myanmar +Ogham ᚔᚒᚅᚔᚉᚑᚇ / / +Old Italic +Oriya ୟୂନିକୋଡ +Osmanya +Runic (Anglo-Saxon) ᛡᚢᚾᛁᚳᚩᛞ +Shavian +Sinhala යණනිකෞද් +Syriac ܝܘܢܝܩܘܕ +Tagbanwa +Tagalog +Tai Le +Tamil யூனிகோட் +Telugu యూనికోడ్ +Thaana +Thai ยูนืโคด +Tibetan (Dzongkha) ཨུ་ནི་ཀོཌྲ། +Ugaritic +Yi + + + Notes: + +There are different ways to transcribe the word “Unicode”, depending on +the language and script. In some cases there is only one language that +customarily uses a given script; in others there are many languages. The +goal here is at a minimum to collect at least one transcription for each +script in a language customarily written in that script, with more +languages if possible. If the transcription is the same for multiple +languages in a script, then a single representative language is used. + +Still missing are transcriptions for the items above in RED (in at least +one language). I would appreciate any other transcriptions, or +corrections for the ones listed here. Send to mark3@macchiato.com +, using the directions below: + + * *Supplying Missing Items* + o Most Latin-script languages will follow the spelling, and + change the pronunciation. For any that would not, it would + be good to have the alternate spelling. + o For non-Latin scripts the goal is to match the English + pronunciation — /*not*/ spelling. Above is the IPA <#IPA> + (in phonemic transcription) that should be matched as + closely as possible (without sounding affected in the target + language) + o Text would be best in either the UTF-8 text, or the code + points in hex HTML. E.g. either of the following: + + "Юникод" + + "Юникод" + + Note: for / supplementary characters/ + , + there should be one hex number per code point, not two + surrogates + : + # 𐀀 /*not*/ �&xDC00; + o If you have a good font, I'd also appreciate a GIF. It + should be *96 x 24* bits, with the text centered, in black + on white (plus grays if smoothed). + * *Other Comments* + o Because some browsers won't handle the text, both text and + GIF image are supplied. If you can’t read the text columns, + see Display Problems + . + o The Chinese versions (inc. Bopomofo) are translations, not + transcriptions, since "transcription in Chinese is pretty + lame" [J. Becker]. + o There are other "translations" of Unicode that may be in + use, such as the Vietnamese "Thống Nhất Mã". + o For sample pages in different languages on the Unicode site, + see What is Unicode? + + o Americans are not generally used to IPA, and find a variety + of different systems in their dictionaries. This one leaves + the base letters as they are, and uses diacritics for + pronunciation. + * *Etymology of /Unicode/* + o Coined by J. Becker. Not related to previous usages, such as: + + A telegraphic code in which one word or set of letters + represents a sentence or phrase; a telegram or message + in this. (late 19th century, OED) + o According to my references, the prefix "uni" is directly + from Latin while the word "code" is through French. + o The original Indo-European apparently would have been + *oino-kau-do ("one strike give"): *kau apparently being + related to such English words as: hew, haggle, hoe, hag, + hay, hack, caudad, caudal, caudate, caudex, coda, codex, + codicil, coward, incus, and Kovač (personal name: "smith"). + + I will leave the exact derivations to the exegetes, + but I like the association with "haggle" myself. + * *Contributions* + o This draws on contributions or comments from: + + Dixon Au + + Joe Becker + + Maurice Bauhahn + + Abel Cheung + + Peter Constable + + Michael Everson + + Christopher John Fynn + + Michael Kaplan + + George Kiraz + + Abdul Malik + + Siva Nataraja + + Roozbeh Pournader + + Jonathan Rosenne + + Jungshik Shin + +------------------------------------------------------------------------ + + +Terms of Use . Last updated: +MED - 04/20/2003 15:30:33. + + + + + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + ▝▀▘▙▄▟ diff --git a/v2_1/test_data/utf8samples/quickbrown.txt b/v2_1/test_data/utf8samples/quickbrown.txt new file mode 100644 index 0000000..5db9443 --- /dev/null +++ b/v2_1/test_data/utf8samples/quickbrown.txt @@ -0,0 +1,126 @@ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2001-09-02 + +This file is UTF-8 encoded. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. diff --git a/v2_1/test_drivers/Makefile b/v2_1/test_drivers/Makefile new file mode 100644 index 0000000..a2cbe4c --- /dev/null +++ b/v2_1/test_drivers/Makefile @@ -0,0 +1,19 @@ +CC = g++ +CFLAGS = -g + +all: smoketest regressiontest negativetest utf8readertest + +smoketest: + cd smoke_test && $(MAKE) $@ + +regressiontest: + cd regression_tests && $(MAKE) $@ + +negativetest: + cd negative && $(MAKE) $@ + +utf8readertest: + cd utf8reader && $(MAKE) $@ + +clean: + rm smoke_test/smoketest regression_tests/regressiontest negative/negative utf8reader/utf8reader diff --git a/v2_1/test_drivers/negative/Makefile b/v2_1/test_drivers/negative/Makefile new file mode 100644 index 0000000..00f4a7d --- /dev/null +++ b/v2_1/test_drivers/negative/Makefile @@ -0,0 +1,5 @@ +CC = g++ +CFLAGS = -g -Wall -pedantic + +negativetest: negative.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h + $(CC) $(CFLAGS) negative.cpp -onegative diff --git a/v2_1/test_drivers/negative/negative.cpp b/v2_1/test_drivers/negative/negative.cpp new file mode 100644 index 0000000..8c910d1 --- /dev/null +++ b/v2_1/test_drivers/negative/negative.cpp @@ -0,0 +1,45 @@ +#include "../../source/utf8.h" +using namespace utf8; + +#include +#include +#include +#include +using namespace std; + +const char* TEST_FILE_PATH = "../../test_data/negative/utf8_invalid.txt"; +const unsigned INVALID_LINES[] = { 75, 76, 82, 83, 84, 85, 93, 102, 103, 105, 106, 107, 108, 109, 110, 114, 115, 116, 117, 124, 125, 130, 135, 140, 145, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 169, 175, 176, 177, 207, 208, 209, 210, 211, 220, 221, 222, 223, 224, 232, 233, 234, 235, 236, 247, 248, 249, 250, 251, 252, 253, 257, 258, 259, 260, 261, 262, 263, 264, 268, 269}; +const unsigned* INVALID_LINES_END = INVALID_LINES + sizeof(INVALID_LINES)/sizeof(unsigned); + +int main() +{ + // Open the test file + ifstream fs8(TEST_FILE_PATH); + if (!fs8.is_open()) { + cout << "Could not open " << TEST_FILE_PATH << endl; + return 0; + } + + // Read it line by line + unsigned int line_count = 0; + char byte; + while (!fs8.eof()) { + string line; + while ((byte = static_cast(fs8.get())) != '\n' && !fs8.eof()) + line.push_back(byte); + + line_count++; + // Print out lines that contain invalid UTF-8 + if (!is_valid(line.begin(), line.end())) { + const unsigned* u = find(INVALID_LINES, INVALID_LINES_END, line_count); + if (u == INVALID_LINES_END) + cout << "Unexpected invalid utf-8 at line " << line_count << '\n'; + + // try fixing it: + string fixed_line; + replace_invalid(line.begin(), line.end(), back_inserter(fixed_line)); + if (!is_valid(fixed_line.begin(), fixed_line.end())) + cout << "replace_invalid() resulted in an invalid utf-8 at line " << line_count << '\n'; + } + } +} diff --git a/v2_1/test_drivers/performance/Makefile b/v2_1/test_drivers/performance/Makefile new file mode 100644 index 0000000..5229558 --- /dev/null +++ b/v2_1/test_drivers/performance/Makefile @@ -0,0 +1,5 @@ +CC = g++ +CFLAGS = -O2 + +iconvtest: iconvtest.cpp ../../source/utf8.h timer.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h + $(CC) $(CFLAGS) iconvtest.cpp -oiconvtest diff --git a/v2_1/test_drivers/performance/iconvtest.cpp b/v2_1/test_drivers/performance/iconvtest.cpp new file mode 100644 index 0000000..2f1c27c --- /dev/null +++ b/v2_1/test_drivers/performance/iconvtest.cpp @@ -0,0 +1,125 @@ +#include +#include "../../source/utf8.h" +#include "timer.h" +#include +#include +#include +using namespace std; + +using namespace utf8; + +int main(int argc, char** argv) +{ + if (argc != 2) { + cout << "\nUsage: iconvtest filename\n"; + return 0; + } + const char* test_file_path = argv[1]; + // Open the test file (UTF-8 encoded text) + ifstream fs8(test_file_path, ios::binary); + if (!fs8.is_open()) { + cout << "Could not open " << test_file_path << endl; + return 0; + } + // get length + fs8.seekg(0, ios::end); + int length = fs8.tellg(); + fs8.seekg(0, ios::beg); + + // allocate the buffer (no vector - we are benchmarking conversions, not STL + char* buf = new char[length]; + char* end_buf = buf + length; + // fill the data + fs8.read(buf, length); + fs8.close(); + // the UTF-16 result will not be larger than this (I hope :) ) + vector temputf16; + utf8::utf8to16(buf, end_buf, back_inserter(temputf16)); + int wlength = temputf16.size(); + unsigned short* utf16buf = new unsigned short[wlength]; + + cout << "UTF8 to UTF-16\n"; + { + memset (utf16buf, 0 , wlength * sizeof(unsigned short)); + // utf-8 cpp: + cout << "utf8::utf8to16: "; + timer t(cout); + utf8::utf8to16(buf, buf + length, utf16buf); + } + + { + memset (utf16buf, 0 , wlength * sizeof(unsigned short)); + // utf-8 cpp: + cout << "unchecked::utf8to16: "; + timer t(cout); + utf8::unchecked::utf8to16(buf, buf + length, utf16buf); + } + + // the UTF-16 result will not be larger than this (I hope :) ) + unsigned short* utf16iconvbuf = new unsigned short[wlength]; + { + memset (utf16iconvbuf, 0 , wlength * sizeof(unsigned short)); + // iconv + cout << "iconv: "; + + iconv_t cd = iconv_open("UTF-16LE", "UTF-8"); + if (cd == iconv_t(-1)) { + cout << "Error openning the iconv stream"; + return 0; + } + char* inbuf = buf; + size_t in_bytes_left = length; + char* outbuf = (char*)utf16iconvbuf; + size_t out_bytes_left = wlength * sizeof (unsigned short); + { + timer t(cout); + iconv(cd, &inbuf, &in_bytes_left, &outbuf, &out_bytes_left); + } + iconv_close(cd); + } + + // just check the correctness while we are here: + if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf)) + cout << "Different result!!!\n"; + + // the other way around + cout << "UTF16 to UTF-8\n"; + { + //iconv + memset(buf, 0, length); + cout<< "iconv: "; + + iconv_t cd = iconv_open("UTF-8", "UTF-16LE"); + if (cd == iconv_t(-1)) { + cout << "Error openning the iconv stream"; + return 0; + } + char* inbuf = (char*)utf16buf; + size_t in_bytes_left = wlength * sizeof(unsigned short); + char* outbuf =buf; + size_t out_bytes_left = length; + { + timer t(cout); + iconv(cd, &inbuf, &in_bytes_left, &outbuf, &out_bytes_left); + } + iconv_close(cd); + } + + { + memset (buf, 0 , length); + // utf-8 cpp: + cout << "unchecked::utf16to8: "; + timer t(cout); + utf8::unchecked::utf16to8(utf16buf, utf16buf + wlength, buf); + } + + { + memset (buf, 0 , length); + cout << "utf16to8: "; + timer t(cout); + utf8::utf16to8(utf16buf, utf16buf + wlength, buf); + } + + delete [] buf; + delete [] utf16buf; +} diff --git a/v2_1/test_drivers/performance/timer.h b/v2_1/test_drivers/performance/timer.h new file mode 100644 index 0000000..170dea3 --- /dev/null +++ b/v2_1/test_drivers/performance/timer.h @@ -0,0 +1,21 @@ +#include +#include +struct timer { + timer(std::ostream& report) : report(report) + {start = std::clock();} + ~timer() + { + using namespace std; + end = clock(); + unsigned milliseconds = (end - start)*1000 / CLOCKS_PER_SEC; + report << "Spent " << milliseconds << "ms here\n"; + } + + std::clock_t start; + std::clock_t end; + std::ostream& report; + +private: + // just to surpress a VC++ 8.0 warning + timer& operator = (const timer&) {}; +}; diff --git a/v2_1/test_drivers/performance/win32.cpp b/v2_1/test_drivers/performance/win32.cpp new file mode 100644 index 0000000..9577920 --- /dev/null +++ b/v2_1/test_drivers/performance/win32.cpp @@ -0,0 +1,105 @@ +#include +#include "../../source/utf8.h" +#include "timer.h" +#include +#include +#include +using namespace std; + +using namespace utf8; + +int main(int argc, char** argv) +{ + if (argc != 2) { + cout << "\nUsage: win32test filename\n"; + return 0; + } + const char* test_file_path = argv[1]; + // Open the test file (UTF-8 encoded text) + ifstream fs8(test_file_path, ios::binary); + if (!fs8.is_open()) { + cout << "Could not open " << test_file_path << endl; + return 0; + } + // get length + fs8.seekg(0, ios::end); + int length = fs8.tellg(); + fs8.seekg(0, ios::beg); + + // allocate the buffer (no vector - we are benchmarking conversions, not STL + char* buf = new char[length]; + // fill the data + fs8.read(buf, length); + fs8.close(); + cout << "UTF8 > UTF16\n"; + // the UTF-16 result will not be larger than this (I hope :) ) + vector temputf16; + utf8::utf8to16(buf, buf + length, back_inserter(temputf16)); + vector::size_type wlength = temputf16.size(); + wchar_t* utf16buf = new wchar_t[wlength]; + + { + memset (utf16buf, 0 , wlength * sizeof(wchar_t)); + // utf-8 cpp: + cout << "utf8::utf8to16: "; + timer t(cout); + utf8::utf8to16(buf, buf + length, utf16buf); + } + + { + memset (utf16buf, 0 , wlength * sizeof(wchar_t)); + // utf-8 cpp: + cout << "unchecked::utf8to16: "; + timer t(cout); + utf8::unchecked::utf8to16(buf, buf + length, utf16buf); + } + + // the UTF-16 result will not be larger than this (I hope :) ) + wchar_t* utf16iconvbuf = new wchar_t[wlength]; + { + memset (utf16iconvbuf, 0 , wlength * sizeof(wchar_t)); + // win32 + cout << "win32: "; + + { + timer t(cout); + MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, length, utf16iconvbuf, int(wlength)); + } + + } + + // just check the correctness while we are here: + if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf)) + cout << "Different result!!!"; + + // the other way around + cout << "UTF16 to UTF-8\n"; + { + //win32 + memset(buf, 0, length); + cout<< "win32: "; + + { + timer t(cout); + WideCharToMultiByte(CP_UTF8, 0, utf16buf, int(wlength), buf, length, NULL, NULL); + } + } + + { + memset (buf, 0 , length); + // utf-8 cpp: + cout << "unchecked::utf16to8: "; + timer t(cout); + utf8::unchecked::utf16to8(utf16buf, utf16buf + wlength, buf); + } + + { + memset (buf, 0 , length); + cout << "utf16to8: "; + timer t(cout); + utf8::utf16to8(utf16buf, utf16buf + wlength, buf); + } + + delete [] buf; + delete [] utf16buf; +} diff --git a/v2_1/test_drivers/regression_tests/Makefile b/v2_1/test_drivers/regression_tests/Makefile new file mode 100644 index 0000000..133f828 --- /dev/null +++ b/v2_1/test_drivers/regression_tests/Makefile @@ -0,0 +1,6 @@ +CC = g++ +CFLAGS = -g -Wall -pedantic +REG_FILES = r1_0Beta1/*h r1_0Beta2/*.h + +regressiontest: reg_tests_driver.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h $(REG_FILES) + $(CC) $(CFLAGS) reg_tests_driver.cpp -o regressiontest diff --git a/v2_1/test_drivers/regression_tests/r1_0Beta1/basic_functionality.h b/v2_1/test_drivers/regression_tests/r1_0Beta1/basic_functionality.h new file mode 100644 index 0000000..15c45ce --- /dev/null +++ b/v2_1/test_drivers/regression_tests/r1_0Beta1/basic_functionality.h @@ -0,0 +1,16 @@ +#include "../../../source/utf8.h" +using namespace utf8; + +// [ 1528544 ] utf::next does not work correctly for 4-byte sequences +void id_1528544() +{ + unsigned char u10ffff[] = {0xf4, 0x8f, 0xbf, 0xbf}; + unsigned char* uit = u10ffff; + try { + unsigned int cp_u10ffff = next (uit, u10ffff + 4); + check (cp_u10ffff == 0x10ffff); + } + catch (std::exception&) { + check (false); + } +} diff --git a/v2_1/test_drivers/regression_tests/r1_0Beta1/invalidutf8.h b/v2_1/test_drivers/regression_tests/r1_0Beta1/invalidutf8.h new file mode 100644 index 0000000..f284a9c --- /dev/null +++ b/v2_1/test_drivers/regression_tests/r1_0Beta1/invalidutf8.h @@ -0,0 +1,96 @@ +#include "../../../source/utf8.h" +using namespace utf8; + +/// [ 1524459 ] utf8::is_valid does not report some illegal code positions +void id_1524459() +{ +// Single UTF-16 surrogates: +unsigned char ud800[] = {0xed, 0xa0, 0x80}; +check (!is_valid(ud800, ud800 + 3)); + +unsigned char udb7f[] = {0xed, 0xad, 0xbf}; +check (!is_valid(udb7f, udb7f + 3)); + +unsigned char udb80[] = {0xed, 0xae, 0x80}; +check (!is_valid(udb80, udb80 + 3)); + +unsigned char udbff[] = {0xed, 0xaf, 0xbf}; +check (!is_valid(udbff, udbff + 3)); + +unsigned char udc00[] = {0xed, 0xb0, 0x80}; +check (!is_valid(udc00, udc00 + 3)); + +unsigned char udf80[] = {0xed, 0xbe, 0x80}; +check (!is_valid(udf80, udf80 + 3)); + +unsigned char udfff[] = {0xed, 0xbf, 0xbf}; +check (!is_valid(udfff, udfff + 3)); + +// Paired UTF-16 surrogates: +unsigned char ud800_dc00[] = {0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80}; +check (!is_valid(ud800_dc00, ud800_dc00 + 6)); + +unsigned char ud800_dfff[] = {0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf}; +check (!is_valid(ud800_dfff, ud800_dfff + 6)); + +unsigned char udb7f_dc00[] = {0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80}; +check (!is_valid(udb7f_dc00, udb7f_dc00 + 6)); + +unsigned char udb7f_dfff[] = {0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf}; +check (!is_valid(udb7f_dfff, udb7f_dfff + 6)); + +unsigned char udb80_dc00[] = {0xed, 0xae, 0x80, 0xed, 0xb0, 0x80}; +check (!is_valid(udb80_dc00, udb80_dc00 + 6)); + +unsigned char udb80_dfff[] = {0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf}; +check (!is_valid(udb80_dfff, udb80_dfff + 6)); + +unsigned char udbff_dc00[] = {0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80}; +check (!is_valid(udbff_dc00, udbff_dc00 + 6)); + +unsigned char udbff_dfff[] = {0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf}; +check (!is_valid(udbff_dfff, udbff_dfff + 6)); + +// Other illegal code points +unsigned char ufffe[] = {0xef, 0xbf, 0xbe}; +check (!is_valid(ufffe, ufffe + 3)); + +unsigned char uffff[] = {0xef, 0xbf, 0xbf}; +check (!is_valid(uffff, uffff + 3)); +} + +// [ 1525236 ] utf8::is_valid does not detect overlong sequences +void id_1525236 () +{ +unsigned char u2f_2bytes[] = {0xc0, 0xaf}; +check (!is_valid(u2f_2bytes, u2f_2bytes + 2)); + +unsigned char u2f_3bytes[] = {0xe0, 0x80, 0xaf}; +check (!is_valid(u2f_3bytes, u2f_3bytes + 3)); + +unsigned char u2f_4bytes[] = {0xf0, 0x80, 0x80, 0xaf}; +check (!is_valid(u2f_4bytes, u2f_4bytes + 4)); +} + +// [ 1528369 ] utf8::find_invalid does not return the start of a seqence +void id_1528369 () +{ +// incomplete utf sequences +unsigned char utf_incomplete[] = {0xe6, 0x97, 0x0}; +unsigned char* invalid = find_invalid(utf_incomplete, utf_incomplete + 3); +check (invalid == utf_incomplete); + +unsigned char utf_incomplete_two_seqs[] = {0xE6, 0x97, 0xA5, 0xd1, 0x0}; +invalid = find_invalid(utf_incomplete_two_seqs, utf_incomplete_two_seqs + 5); +check (invalid == utf_incomplete_two_seqs + 3); + +// invalid code point +unsigned char udbff[] = {0xed, 0xaf, 0xbf}; +invalid = find_invalid(udbff, udbff + 3); +check (invalid == udbff); + +// overlong sequence +unsigned char u2f_3bytes[] = {0xe0, 0x80, 0xaf}; +invalid = find_invalid(u2f_3bytes, u2f_3bytes + 3); +check (invalid == u2f_3bytes); +} diff --git a/v2_1/test_drivers/regression_tests/r1_0Beta2/basic_functionality.h b/v2_1/test_drivers/regression_tests/r1_0Beta2/basic_functionality.h new file mode 100644 index 0000000..e5b65c5 --- /dev/null +++ b/v2_1/test_drivers/regression_tests/r1_0Beta2/basic_functionality.h @@ -0,0 +1,11 @@ +#include "../../../source/utf8.h" +using namespace utf8; + +// [ 1531740 ] utf8::append does not work correctly for some code points. +void id_1531740() +{ + unsigned cp_u3044 = 0x3044U; + unsigned char u3044[] = {0x0, 0x0, 0x0, 0x0}; + append(cp_u3044, u3044); + check (u3044[0] == 0xe3 && u3044[1] == 0x81 && u3044[2] == 0x84 && u3044[3] == 0); +} diff --git a/v2_1/test_drivers/regression_tests/r1_0Beta3/basic_functionality.h b/v2_1/test_drivers/regression_tests/r1_0Beta3/basic_functionality.h new file mode 100644 index 0000000..40af658 --- /dev/null +++ b/v2_1/test_drivers/regression_tests/r1_0Beta3/basic_functionality.h @@ -0,0 +1,11 @@ +#include "../../../source/utf8.h" +using namespace utf8; + +// [ 1538338 ] unchecked::next does not work correctly for 4-byte sequences. +void id_1538338() +{ + char* four_bytes = "\xf0\x90\x8d\x86"; + char* it = four_bytes; + int cp = unchecked::next(it); + check (cp == 0x10346); +} diff --git a/v2_1/test_drivers/regression_tests/reg_tests_driver.cpp b/v2_1/test_drivers/regression_tests/reg_tests_driver.cpp new file mode 100644 index 0000000..19a5ac3 --- /dev/null +++ b/v2_1/test_drivers/regression_tests/reg_tests_driver.cpp @@ -0,0 +1,40 @@ +#include +using namespace std; + +inline void check_impl (bool condition, const char* file, int line) + { + if (!condition) { + cout << "Check Failed! File: " << file << " Line: " << line << '\n'; + } + } + +#define check(c) check_impl(c, __FILE__, __LINE__); + +// Release 1.0 Beta 1 +#include "r1_0Beta1/invalidutf8.h" +#include "r1_0Beta1/basic_functionality.h" +// Release 1.0 Beta 2 +#include "r1_0Beta2/basic_functionality.h" +// Release 1.0 Beta 3 +#include "r1_0Beta3/basic_functionality.h" + + + +int main() +{ +// Release 1.0 Beta 1 +//r1_0Beta1/invalidutf8.h + id_1524459(); + id_1525236(); + id_1528369(); +//r1_0Beta1/basic_functionality.h + id_1528544(); + +// Release 1.0 Beta 2 +//r1_0Beta2/basic_functionality.h + id_1531740(); + +// Release 1.0 Beta 3 +//r1_0Beta3/basic_functionality.h + id_1538338(); +} diff --git a/v2_1/test_drivers/runtests.pl b/v2_1/test_drivers/runtests.pl new file mode 100755 index 0000000..77b5402 --- /dev/null +++ b/v2_1/test_drivers/runtests.pl @@ -0,0 +1,50 @@ +#! /usr/bin/perl + +$report_name = './report.txt'; + +# Create the report file +die if !open(REPORT, ">$report_name"); + +# First, build everything +print REPORT "==================Make output==================\n"; +close($report_name); + +`make >> $report_name`; +die if !open(REPORT, ">>$report_name"); +print REPORT "==================End of Make output==================\n"; +print REPORT "\n"; +# Now, run individual tests and create the report +print REPORT "==================Smoke Test ==================\n"; +close($report_name); +chdir 'smoke_test'; +`./smoketest >> ../$report_name`; +chdir '..'; +die if !open(REPORT, ">>$report_name"); +print REPORT "==================End of smoke test==================\n"; +print REPORT "\n"; +print REPORT "==================Regression Test ==================\n"; +close($report_name); +chdir 'regression_tests'; +`./regressiontest >> ../$report_name`; +chdir '..'; +die if !open(REPORT, ">>$report_name"); +print REPORT "==================End of regression test==================\n"; +print REPORT "\n"; +print REPORT "==================Negative Test ==================\n"; +close($report_name); +chdir 'negative'; +`./negative >> ../$report_name`; +chdir '..'; +die if !open(REPORT, ">>$report_name"); +print REPORT "==================End of negative test==================\n"; +print REPORT "\n"; +print REPORT "==================utf8reader runs ==================\n"; +close($report_name); +chdir 'utf8reader'; +`./utf8reader ../../test_data/utf8samples/quickbrown.txt >> ../$report_name`; +`./utf8reader ../../test_data/utf8samples/Unicode_transcriptions.html >> ../$report_name`; +`./utf8reader ../../test_data/utf8samples/UTF-8-demo.txt >> ../$report_name`; +chdir '..'; +die if !open(REPORT, ">>$report_name"); +print REPORT "==================End of utf8reader runs==================\n"; +print REPORT "\n"; diff --git a/v2_1/test_drivers/smoke_test/Makefile b/v2_1/test_drivers/smoke_test/Makefile new file mode 100644 index 0000000..f66f3af --- /dev/null +++ b/v2_1/test_drivers/smoke_test/Makefile @@ -0,0 +1,5 @@ +CC = g++ +CFLAGS = -g -Wall + +smoketest: test.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h + $(CC) $(CFLAGS) test.cpp -osmoketest diff --git a/v2_1/test_drivers/smoke_test/test.cpp b/v2_1/test_drivers/smoke_test/test.cpp new file mode 100644 index 0000000..c50e136 --- /dev/null +++ b/v2_1/test_drivers/smoke_test/test.cpp @@ -0,0 +1,280 @@ + +#include +#include +#include "../../source/utf8.h" +using namespace utf8; +using namespace std; + +int main() +{ + //append + unsigned char u[5] = {0,0,0,0,0}; + + unsigned char* end = append(0x0448, u); + assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); + + end = append(0x65e5, u); + assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0); + + end = append(0x3044, u); + assert (u[0] == 0xe3 && u[1] == 0x81 && u[2] == 0x84 && u[3] == 0 && u[4] == 0); + + end = append(0x10346, u); + assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0); + + + + //next + char* twochars = "\xe6\x97\xa5\xd1\x88"; + char* w = twochars; + int cp = next(w, twochars + 6); + assert (cp == 0x65e5); + assert (w == twochars + 3); + + char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; + w = threechars; + cp = next(w, threechars + 9); + assert (cp == 0x10346); + assert (w == threechars + 4); + cp = next(w, threechars + 9); + assert (cp == 0x65e5); + assert (w == threechars + 7); + cp = next(w, threechars + 9); + assert (cp == 0x0448); + assert (w == threechars + 9); + + //prior + w = twochars + 3; + cp = prior (w, twochars); + assert (cp == 0x65e5); + assert (w == twochars); + + w = threechars + 9; + cp = prior(w, threechars); + assert (cp == 0x0448); + assert (w == threechars + 7); + cp = prior(w, threechars); + assert (cp == 0x65e5); + assert (w == threechars + 4); + cp = prior(w, threechars); + assert (cp == 0x10346); + assert (w == threechars); + + //previous (deprecated) + w = twochars + 3; + cp = previous (w, twochars - 1); + assert (cp == 0x65e5); + assert (w == twochars); + + w = threechars + 9; + cp = previous(w, threechars - 1); + assert (cp == 0x0448); + assert (w == threechars + 7); + cp = previous(w, threechars -1); + assert (cp == 0x65e5); + assert (w == threechars + 4); + cp = previous(w, threechars - 1); + assert (cp == 0x10346); + assert (w == threechars); + + // advance + w = twochars; + advance (w, 2, twochars + 6); + assert (w == twochars + 5); + + // distance + size_t dist = utf8::distance(twochars, twochars + 5); + assert (dist == 2); + + // utf32to8 + int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; + vector utf8result; + utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); + assert (utf8result.size() == 9); + // try it with the return value; + char* utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 9); + + //utf8to32 + vector utf32result; + utf8to32(twochars, twochars + 5, back_inserter(utf32result)); + assert (utf32result.size() == 2); + // try it with the return value; + int* utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); + assert (utf32_end == &utf32result[0] + 2); + + //utf16to8 + unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; + utf8result.clear(); + utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); + assert (utf8result.size() == 10); + // try it with the return value; + utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 10); + + //utf8to16 + char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; + vector utf16result; + utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); + assert (utf16result.size() == 4); + assert (utf16result[2] == 0xd834); + assert (utf16result[3] == 0xdd1e); + // try it with the return value; + unsigned short* utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); + assert (utf16_end == &utf16result[0] + 4); + + //find_invalid + char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; + char* invalid = find_invalid(utf_invalid, utf_invalid + 6); + assert (invalid == utf_invalid + 5); + + //is_valid + bool bvalid = is_valid(utf_invalid, utf_invalid + 6); + assert (bvalid == false); + bvalid = is_valid(utf8_with_surrogates, utf8_with_surrogates + 9); + assert (bvalid == true); + + //is_bom + unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf}; + bool bbom = is_bom(byte_order_mark); + assert (bbom == true); + + //replace_invalid + char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; + vector replace_invalid_result; + replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), back_inserter(replace_invalid_result), '?'); + bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end()); + assert (bvalid); + char* fixed_invalid_sequence = "a????z"; + assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(), fixed_invalid_sequence)); + + // iterator + utf8::iterator it(threechars, threechars, threechars + 9); + utf8::iterator it2 = it; + assert (it2 == it); + assert (*it == 0x10346); + assert (*(++it) == 0x65e5); + assert ((*it++) == 0x65e5); + assert (*it == 0x0448); + assert (it != it2); + utf8::iterator endit (threechars + 9, threechars, threechars + 9); + assert (++it == endit); + assert (*(--it) == 0x0448); + assert ((*it--) == 0x0448); + assert (*it == 0x65e5); + assert (--it == utf8::iterator(threechars, threechars, threechars + 9)); + assert (*it == 0x10346); + + ////////////////////////////////////////////////////////// + //// Unchecked variants + ////////////////////////////////////////////////////////// + + //append + memset(u, 0, 5); + end = unchecked::append(0x0448, u); + assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); + + end = unchecked::append(0x65e5, u); + assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0); + + end = unchecked::append(0x10346, u); + assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0); + + //next + w = twochars; + cp = unchecked::next(w); + assert (cp == 0x65e5); + assert (w == twochars + 3); + + w = threechars; + cp = unchecked::next(w); + assert (cp == 0x10346); + assert (w == threechars + 4); + cp = unchecked::next(w); + assert (cp == 0x65e5); + assert (w == threechars + 7); + cp = unchecked::next(w); + assert (cp == 0x0448); + assert (w == threechars + 9); + + + //previous (calls prior internally) + + w = twochars + 3; + cp = unchecked::previous (w); + assert (cp == 0x65e5); + assert (w == twochars); + + w = threechars + 9; + cp = unchecked::previous(w); + assert (cp == 0x0448); + assert (w == threechars + 7); + cp = unchecked::previous(w); + assert (cp == 0x65e5); + assert (w == threechars + 4); + cp = unchecked::previous(w); + assert (cp == 0x10346); + assert (w == threechars); + + // advance + w = twochars; + unchecked::advance (w, 2); + assert (w == twochars + 5); + + // distance + dist = unchecked::distance(twochars, twochars + 5); + assert (dist == 2); + + // utf32to8 + utf8result.clear(); + unchecked::utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); + assert (utf8result.size() == 9); + // try it with the return value; + utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); + assert(utf8_end == &utf8result[0] + 9); + + //utf8to32 + utf32result.clear(); + unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result)); + assert (utf32result.size() == 2); + // try it with the return value; + utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); + assert (utf32_end == &utf32result[0] + 2); + + //utf16to8 + utf8result.clear(); + unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); + assert (utf8result.size() == 10); + // try it with the return value; + utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 10); + + //utf8to16 + utf16result.clear(); + unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); + assert (utf16result.size() == 4); + assert (utf16result[2] == 0xd834); + assert (utf16result[3] == 0xdd1e); + // try it with the return value; + utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); + assert (utf16_end == &utf16result[0] + 4); + + // iterator + utf8::unchecked::iterator un_it(threechars); + utf8::unchecked::iterator un_it2 = un_it; + assert (un_it2 == un_it); + assert (*un_it == 0x10346); + assert (*(++un_it) == 0x65e5); + assert ((*un_it++) == 0x65e5); + assert (un_it != un_it2); + assert (*un_it == 0x0448); + utf8::unchecked::iterator un_endit (threechars + 9); + assert (++un_it == un_endit); + assert (*(--un_it) == 0x0448); + assert ((*un_it--) == 0x0448); + assert (*un_it == 0x65e5); + assert (--un_it == utf8::unchecked::iterator(threechars)); + assert (*un_it == 0x10346); +} + + diff --git a/v2_1/test_drivers/utf8reader/Makefile b/v2_1/test_drivers/utf8reader/Makefile new file mode 100644 index 0000000..29a9aa8 --- /dev/null +++ b/v2_1/test_drivers/utf8reader/Makefile @@ -0,0 +1,5 @@ +CC = g++ +CFLAGS = -g -Wall -pedantic + +utf8readertest: utf8reader.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h + $(CC) $(CFLAGS) utf8reader.cpp -o utf8reader diff --git a/v2_1/test_drivers/utf8reader/utf8reader.cpp b/v2_1/test_drivers/utf8reader/utf8reader.cpp new file mode 100644 index 0000000..ca85286 --- /dev/null +++ b/v2_1/test_drivers/utf8reader/utf8reader.cpp @@ -0,0 +1,156 @@ +#include "../../source/utf8.h" +using namespace utf8; + +#include +#include +#include +#include +using namespace std; + +int main(int argc, char** argv) +{ + if (argc != 2) { + cout << "\nUsage: utfreader filename\n"; + return 0; + } + const char* TEST_FILE_PATH = argv[1]; + // Open the test file + ifstream fs8(TEST_FILE_PATH); + if (!fs8.is_open()) { + cout << "Could not open " << TEST_FILE_PATH << endl; + return 0; + } + + // Read it line by line + unsigned int line_count = 0; + char byte; + while (!fs8.eof()) { + string line; + while ((byte = static_cast(fs8.get())) != '\n' && !fs8.eof()) + line.push_back(byte); + + line_count++; + // Play around with each line and convert it to utf16 + string::iterator line_start = line.begin(); + string::iterator line_end = line.end(); + line_end = find_invalid(line_start, line_end); + if (line_end != line.end()) + cout << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end) << '\n'; + + // Convert it to utf-16 and write to the file + vector utf16_line; + utf8to16(line_start, line_end, back_inserter(utf16_line)); + + // Back to utf-8 and compare it to the original line. + string back_to_utf8; + utf16to8(utf16_line.begin(), utf16_line.end(), back_inserter(back_to_utf8)); + if (back_to_utf8.compare(string(line_start, line_end)) != 0) + cout << "Line " << line_count << ": Conversion to UTF-16 and back failed" << '\n'; + + // Now, convert it to utf-32, back to utf-8 and compare + vector utf32_line; + utf8to32(line_start, line_end, back_inserter(utf32_line)); + back_to_utf8.clear(); + utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8)); + if (back_to_utf8.compare(string(line_start, line_end)) != 0) + cout << "Line " << line_count << ": Conversion to UTF-32 and back failed" << '\n'; + + // Now, iterate and back + unsigned char_count = 0; + string::iterator it = line_start; + while (it != line_end) { + next(it, line_end); + char_count++; + } + if (char_count != utf32_line.size()) + cout << "Line " << line_count << ": Error in iterating with next - wrong number of characters" << '\n'; + + string::iterator adv_it = line_start; + utf8::advance(adv_it, char_count, line_end); + if (adv_it != line_end) + cout << "Line " << line_count << ": Error in advance function" << '\n'; + + if (string::size_type(utf8::distance(line_start, line_end)) != char_count) + cout << "Line " << line_count << ": Error in distance function" << '\n'; + + while (it != line_start) { + previous(it, line.rend().base()); + char_count--; + } + if (char_count != 0) + cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n'; + + // Try utf8::iterator + utf8::iterator u8it(line_start, line_start, line_end); + if (!utf32_line.empty() && *u8it != utf32_line.at(0)) + cout << "Line " << line_count << ": Error in utf::iterator * operator" << '\n'; + if (std::distance(u8it, utf8::iterator(line_end, line_start, line_end)) != static_cast(utf32_line.size())) + cout << "Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters" << '\n'; + + std::advance(u8it, utf32_line.size()); + if (u8it != utf8::iterator(line_end, line_start, line_end)) + cout << "Line " << line_count << ": Error in using utf::iterator with std::advance" << '\n'; + + + //======================== Now, the unchecked versions ====================== + // Convert it to utf-16 and compare to the checked version + vector utf16_line_unchecked; + unchecked::utf8to16(line_start, line_end, back_inserter(utf16_line_unchecked)); + + if (utf16_line != utf16_line_unchecked) + cout << "Line " << line_count << ": Error in unchecked::utf8to16" << '\n'; + + // Back to utf-8 and compare it to the original line. + back_to_utf8.clear(); + unchecked::utf16to8(utf16_line_unchecked.begin(), utf16_line_unchecked.end(), back_inserter(back_to_utf8)); + if (back_to_utf8.compare(string(line_start, line_end)) != 0) + cout << "Line " << line_count << ": Unchecked conversion to UTF-16 and back failed" << '\n'; + + // Now, convert it to utf-32, back to utf-8 and compare + vector utf32_line_unchecked; + unchecked::utf8to32(line_start, line_end, back_inserter(utf32_line_unchecked)); + if (utf32_line != utf32_line_unchecked) + cout << "Line " << line_count << ": Error in unchecked::utf8to32" << '\n'; + + back_to_utf8.clear(); + unchecked::utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8)); + if (back_to_utf8.compare(string(line_start, line_end)) != 0) + cout << "Line " << line_count << ": Unchecked conversion to UTF-32 and back failed" << '\n'; + + // Now, iterate and back + char_count = 0; + it = line_start; + while (it != line_end) { + unchecked::next(it); + char_count++; + } + if (char_count != utf32_line.size()) + cout << "Line " << line_count << ": Error in iterating with unchecked::next - wrong number of characters" << '\n'; + + adv_it = line_start; + utf8::unchecked::advance(adv_it, char_count); + if (adv_it != line_end) + cout << "Line " << line_count << ": Error in unchecked::advance function" << '\n'; + + if (string::size_type(utf8::unchecked::distance(line_start, line_end)) != char_count) + cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n'; + + while (it != line_start) { + unchecked::previous(it); + char_count--; + } + if (char_count != 0) + cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n'; + + // Try utf8::unchecked::iterator + utf8::unchecked::iterator un_u8it(line_start); + if (!utf32_line.empty() && *un_u8it != utf32_line.at(0)) + cout << "Line " << line_count << ": Error in utf::unchecked::iterator * operator" << '\n'; + if (std::distance(un_u8it, utf8::unchecked::iterator(line_end)) != static_cast(utf32_line.size())) + cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::distance - wrong number of characters" << '\n'; + + std::advance(un_u8it, utf32_line.size()); + if (un_u8it != utf8::unchecked::iterator(line_end)) + cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::advance" << '\n'; + } +}